from bs4 import BeautifulSoup
import pymongo
import datetime
import requests


def parse_url_to_soup(url):
    headers = {
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36"
    }
    r = requests.get(url, headers=headers)
    return BeautifulSoup(r.content.decode('utf-8'), 'lxml')


def parse_li_to_data_list(li_list):
    data_list = []
    for li in li_list:
        post_title = li.find('a', class_='truetit').text.strip()
        post_href = li.find('a', class_='truetit')['href']
        author_a_list = li.find('div', class_='author').find_all('a')
        author = author_a_list[0].text
        author_href = author_a_list[0]['href']
        create_time = author_a_list[1].text
        reply_text = li.find('span', class_='ansour box').text.strip().split('/')
        reply_num = reply_text[0].strip()
        view_num = reply_text[1].strip()
        last_reply_user = li.find('div', class_='endreply').find('span').text
        last_reply_time = li.find('div', class_='endreply').find('a').text.strip()
        data_list.append({
            'post_title': post_title,
            'post_href': post_href,
            'author': author,
            'author_href': author_href,
            'create_time': create_time,
            'reply_num': reply_num,
            'view_num': view_num,
            'last_reply_user': last_reply_user,
            'last_reply_time': last_reply_time
        })
    return data_list


client = pymongo.MongoClient('192.168.1.156', 27017)
spider = client.spider
spider.authenticate("root", "lotut@mongodb")
post = spider.post
for page in range(1, 10):
    link = 'https://bbs.hupu.com/bxj-' + str(page)
    soup = parse_url_to_soup(link)
    ul = soup.find('ul', class_='for-list')
    li_list = ul.find_all('li')
    for data in parse_li_to_data_list(li_list):
        post.insert(data)


